In [4]:
!pip install folium
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import folium
from folium.plugins import HeatMap
from folium.plugins import HeatMapWithTime
import datetime
import statsmodels.formula.api as sm 
Requirement already satisfied: folium in /opt/conda/lib/python3.7/site-packages (0.10.1)
Requirement already satisfied: jinja2>=2.9 in /opt/conda/lib/python3.7/site-packages (from folium) (2.10.1)
Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from folium) (1.17.0)
Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from folium) (2.22.0)
Requirement already satisfied: branca>=0.3.0 in /opt/conda/lib/python3.7/site-packages (from folium) (0.3.1)
Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.7/site-packages (from jinja2>=2.9->folium) (1.1.1)
Requirement already satisfied: idna<2.9,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->folium) (2.8)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->folium) (3.0.4)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->folium) (1.25.3)
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->folium) (2019.6.16)
Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from branca>=0.3.0->folium) (1.12.0)
In [5]:
original = pd.read_excel('traffic_sample.xlsx')
In [6]:
# Converting the Latitude and Longitude Attributes to a Float
original["Latitude"] = original["Latitude"].astype(float)
original["Longitude"] = original["Longitude"].astype(float)

original = original[original["Gender"] != "U"]
original = original[(original["Year"] != 0) & (original["Year"] < 2020) & (original["Year"] > 1900)]
In [7]:
filtered_cols = ["Date Of Stop", "Time Of Stop", "SubAgency", 
                 "Description", "Location", "Latitude", "Longitude",
                 "Accident", "Belts", "Personal Injury", "Property Damage",
                 "Fatal", "Alcohol", "Work Zone", "VehicleType",
                 "Year", "Make", "Violation Type", "Contributed To Accident",
                 "Race", "Gender", "DL State"]

# Can break up the criteria above to make the dataframe more tidy
sam = original[filtered_cols].copy()
In [10]:
def generate_map(loc = [39.1247, -77.1905], zoom = 10.5, tile = "openstreetmap"):
    res_map = folium.Map(location = loc, zoom_start = zoom, control_scale = True, tiles = tile)
    
    # Add the Tile (or Style) of the Map
    folium.TileLayer('openstreetmap').add_to(res_map)
    folium.TileLayer('Stamen Watercolor').add_to(res_map)
    folium.TileLayer('Stamen Toner').add_to(res_map)
    return res_map
    
In [11]:
# This Function returns the designated color assigned to a race.
def color_select(race):
    ethnicity = {'ASIAN': "#ed8134", # Orange
                 'BLACK': "#391cba", #Indigo
                 'HISPANIC': "#119992", #Teal 
                 'NATIVE AMERICAN': "#9412b8", # Violet 
                 'OTHER': "#127bb8", # Blue
                 'WHITE': "#e81c1c"} # Red
    
    return ethnicity[race]

Map Exploring the Race and Gender

In [30]:
# Creating an Empty Map
map_total = generate_map()

# other mapping code (e.g. lines, markers etc.)
asian_fg = folium.FeatureGroup(name = "Asian") 
black_fg = folium.FeatureGroup(name = "Black") 
his_fg = folium.FeatureGroup(name = "Hispanic") 
na_fg = folium.FeatureGroup(name = "Native American") 
other_fg = folium.FeatureGroup(name = "Other") 
white_fg = folium.FeatureGroup(name = "White") 

race = {'ASIAN': asian_fg, 
        'BLACK': black_fg, 
        'HISPANIC': his_fg, 
        'NATIVE AMERICAN': na_fg, 
        'OTHER': other_fg, 
        'WHITE': white_fg} 

legend_html = '''
    <style>
        .circle {
          height: 10px;
          width: 10px;
          background-color: orange;
          border-radius: 50%;
         }
         .square {
              height: 10px;
              width: 10px;
              background-color: #ed8134;
         }
         div {
             display: inline-block;
         }
         legend {
             font-size: 13px
         }
         .triangle {
            width: 0;
            height: 0;
            border-left: 7.5px solid transparent;
            border-right: 7.5px solid transparent;
            border-bottom: 15px solid #ed8134;
         }
    </style>
    
    <div style="position: fixed;
        left: 50px; width: 150px;
        border:2px solid black; z-index:9999; font-size:12px; background-color: white;"> 
        <legend><b>Legend:</b></legend>
        <b>Race: </b><br>
        Asian: <div class = circle style = "background-color: #ed8134"> </div> <br>
        White: <div class = circle style = "background-color: #e81c1c"> </div><br>
        Black: <div class = circle style = "background-color: #391cba"> </div><br>
        Hispanic: <div class = circle style = "background-color: #119992"> </div><br>
        Native American: <div class = circle style = "background-color: #9412b8"> </div><br>
        Other: <div class = circle style = "background-color: #127bb8"> </div>
        <hr>
        <b>Gender: </b><br>
        Male: <div class = triangle> </div> <br>
        Female: <div class = square> </div> <br>

    </div>

     '''
map_total.get_root().html.add_child(folium.Element(legend_html))

for ind, row in sam.iterrows():
    
    entry = (folium.RegularPolygonMarker(location = [row["Latitude"],row["Longitude"]], popup = row["Description"], 
                                        color= color_select(row["Race"]), fill = True, weight = 1, 
                                        number_of_sides = 3 if row["Gender"] == "M" else 6, 
                                        radius = 4, opactity = .4))
    entry.add_to(race[row["Race"]])

for r in race:
    race[r].add_to(map_total)
    
folium.LayerControl().add_to(map_total)


map_total
Out[30]:

Bar Graph Relating Gender, Race, and the number of Traffic Violation

In [13]:
gr_df = sam.copy()
gr_df["count"] = 1

aggregation_functions = {'count': 'sum'}
nd = gr_df.groupby(['Gender', 'Race']).aggregate(aggregation_functions)

# Setting up the plot and dimension
fig, axs = plt.subplots() 
fig.set_figheight(30)
fig.set_figwidth(40)

b1 = sns.barplot(x="Gender", y ="count", hue="Race", palette = "Spectral", data=nd.reset_index(), ax = axs)
b1.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
          fancybox=True, shadow=True, ncol=3, labelspacing=2, fontsize = 20)

b1.set_title("The Occurrence of Traffic Violation Based on Gender and Race", fontsize = 40)
b1.set_ylabel("Count", fontsize = 30)
b1.set_xlabel("Gender", fontsize = 30)
b1.tick_params(axis='both', labelsize=25)

plt.show()

HeatMap Exploring the Occurrence based on Time (Hours)

In [14]:
sam["hour"] = [t.hour for t in sam["Time Of Stop"]]
cut = pd.cut(sam["hour"], bins = [0,2,4,6,8,10,12,14,16,18,20,22,24], 
             labels = [1,2,3,4,5,6,7,8,9,10,11,12], right = False)
sam["cut"] = cut
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
In [15]:
df_copy = sam.copy()
df_copy['count'] = 1
hr_map = generate_map()

hm_fg = []
hr = 0
for ind in range(12):
    temp_name = "Hours " + str(hr) + " to " + str(hr + 1)
    hm_fg.append(folium.FeatureGroup(name = temp_name, show= True if ind == 0 else False))
    hr += 2


# Group time together to have more during a specifc set of hours\
for index in range(12):    
    temp = df_copy[df_copy["cut"] == index + 1]
    HeatMap(data=temp[['Latitude', 'Longitude', 'count']]
                .groupby(['Latitude', 'Longitude', 'count'])
                .sum()
                .reset_index()
                .values.tolist(), 
                radius=8, max_zoom=13).add_to(hm_fg[index])
    
for fg in hm_fg:
    fg.add_to(hr_map)
        
    
folium.LayerControl().add_to(hr_map)

hr_map
Out[15]:

HeatMap Over Time

In [16]:
time_map = generate_map()
df_hour_list = []
for hour in df_copy["cut"].sort_values().unique():
    df_hour_list.append(df_copy.loc[df_copy.hour == hour, ['Latitude', 'Longitude', 'count']]
                        .groupby(['Latitude', 'Longitude']).sum().reset_index().values.tolist())

HeatMapWithTime(df_hour_list, radius=8, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, 
                min_opacity=0.5, max_opacity=0.8, use_local_extrema=True, auto_play=True).add_to(time_map)

folium.LayerControl().add_to(time_map)

time_map
Out[16]:

Total HeatMap

In [17]:
df_copy = sam.copy()
df_copy['count'] = 1
base_map = generate_map()

HeatMap(data=df_copy[['Latitude', 'Longitude', 'count']]
            .groupby(['Latitude', 'Longitude', 'count'])
            .sum()
            .reset_index()
            .values.tolist(), 
            radius=8, max_zoom=13).add_to(base_map)

folium.LayerControl().add_to(base_map)

base_map
Out[17]:

Exploring the Vehicle Type and Year

In [18]:
##### VehicleType, Year, Make
vy_df = sam.copy()
vy_df['count'] = 1
vy_df = vy_df.sort_values("Year")

aggregation_functions = {'count': 'sum'}
nd2 = vy_df.groupby(['Year', 'VehicleType']).aggregate(aggregation_functions)


# Setting up the plot and dimension
fig, axs = plt.subplots() 
fig.set_figheight(30)
fig.set_figwidth(40)

l = sns.lineplot(x = "Year", y = "count", hue = "VehicleType",data = nd2.reset_index(), ax = axs)
l.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
          fancybox=True, shadow=True, ncol=10, labelspacing=2, fontsize = 18)

l.set_title("The Occurrence of Traffic Violation Based on the Vehicle Type and Year", fontsize = 40)
l.set_ylabel("Occurrence", fontsize = 30)
l.set_xlabel("Year", fontsize = 30)
l.tick_params(axis='both', labelsize=25)

plt.show()

Can We Predict Where A Violation Might Occur?

In [19]:
district = np.unique(original["SubAgency"])
simp_district = {}

for dist in district:
    if dist[0] == "H":
        simp_district[dist] = 7
    else:
        simp_district[dist] = int(dist[0])
In [20]:
race_num = {'ASIAN': 0, 
            'BLACK': 1, 
            'HISPANIC': 2, 
            'NATIVE AMERICAN': 3, 
            'OTHER': 4, 
            'WHITE': 5} 
vt = {"Warning": 1,
      "Citation": 2,
      "ESERO": 3,
      "SERO": 4}
In [21]:
data_reg = sam.copy()

data_reg["district_num"] = [simp_district[d] for d in data_reg["SubAgency"]]
#data_reg = pd.get_dummies(data_reg, columns = ["Race"])
#data_reg["Race_NATIVE"] = data_reg["Race_NATIVE AMERICAN"]
data_reg["race_num"] = [race_num[r] for r in data_reg["Race"]]
data_reg["violation_type_num"] = [vt[v] for v in data_reg["Violation Type"]]
data_reg["gender_num"] = [0 if g == "M" else 1 for g in data_reg["Gender"]] 

data_reg["month"] = [m.month for m in sam["Date Of Stop"]]
data_reg["day"] = [d.day for d in sam["Date Of Stop"]]

data_reg
Out[21]:
Date Of Stop Time Of Stop SubAgency Description Location Latitude Longitude Accident Belts Personal Injury ... Gender DL State hour cut district_num race_num violation_type_num gender_num month day
0 2016-05-01 23:08:00 4th district, Wheaton DRIVER FAIL TO STOP AT RED TRAFFIC SIGNAL BEFO... GEORGIA AVE AT GLENALLAN AVE 39.063522 -77.055263 No No No ... M MD 23 12 4 2 2 0 5 1
1 2017-11-30 01:14:00 3rd district, Silver Spring DRIVING VEHICLE IN EXCESS OF REASONABLE AND PR... RANDOLPH ROAD AT TAMARACK RD 39.067270 -76.984982 No No No ... M MD 1 1 3 1 2 0 11 30
2 2012-05-24 10:48:00 3rd district, Silver Spring DRIVER USING HANDS TO USE HANDHELD TELEPHONE W... WAYNE AVE / COLESVILLE RD, W/B 38.995165 -77.031199 No No No ... F MD 10 6 3 2 2 1 5 24
3 2012-10-18 03:30:00 3rd district, Silver Spring DRIVING TO DRIVE MOTOR VEHICLE ON HIGHWAY WITH... COLUMBIA PIKE AT LORRAIN AVE 38.998501 -77.026377 No No No ... M WV 3 2 3 1 2 0 10 18
4 2012-02-16 07:47:00 6th district, Gaithersburg / Montgomery Village EXCEEDING MAXIMUM SPEED: 34 MPH IN A POSTED 25... GAME PRESERVE RD N/B (11400 BLOCK) 39.157287 -77.239419 No No No ... F MD 7 4 6 2 2 1 2 16
5 2015-09-18 14:19:00 5th district, Germantown DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... OBSERVATION DR AND SENECA MEADOWS PARKWA 39.198632 -77.253483 No No No ... M MD 14 8 5 4 1 0 9 18
6 2014-09-05 01:26:00 3rd district, Silver Spring DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... COLUMBIA PIKE @ FAIRLAND RD 39.070163 -76.952160 No No No ... M MD 1 1 3 5 1 0 9 5
7 2016-11-14 08:08:00 3rd district, Silver Spring DRIVING VEHICLE ON HIGHWAY WITHOUT CURRENT REG... TAMARACK RD / E RANDOLPH RD 39.046277 -76.990695 No No No ... M MD 8 5 3 1 2 0 11 14
8 2016-05-07 21:17:00 3rd district, Silver Spring FAILURE TO CONTROL VEHICLE SPEED ON HIGHWAY TO... NB NEW HAMPSHIRE AVE @ OAKVIEW DR 39.014340 -77.034123 No No No ... M MD 21 11 3 5 2 0 5 7
9 2018-02-06 17:31:00 1st district, Rockville FAILURE OF LICENSEE TO NOTIFY ADMINISTRATION O... I-270 PRIOR TO MONTROSE 39.035290 -77.143953 No No No ... F MD 17 9 1 1 1 1 2 6
10 2015-10-20 17:10:00 5th district, Germantown DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... OBSERVATION DR @ SHAKESPEARE BLVD 39.201037 -77.252962 No No No ... F MD 17 9 5 4 1 1 10 20
11 2015-12-26 14:11:00 5th district, Germantown DRIVER FAILURE TO STOP AT INTERSECTION HWY. ST... WISTERIA DR/GERMANTOWN RD 39.177412 -77.271238 No No No ... F MD 14 8 5 0 2 1 12 26
12 2014-12-11 04:58:00 4th district, Wheaton DRIVING VEHICLE IN EXCESS OF REASONABLE AND PR... EB BROOKEVILLE ROAD/ZION ROAD 39.184595 -77.089718 No No No ... M MD 4 3 4 2 1 0 12 11
13 2015-09-23 14:18:00 1st district, Rockville EXCEEDING MAXIMUM SPEED: 49 MPH IN A POSTED 40... SB SHADY GROVE RD AT SILVER BELL TER 39.087512 -77.211753 No No No ... F MD 14 8 1 1 2 1 9 23
14 2016-03-12 08:02:00 4th district, Wheaton FAILURE TO DISPLAY REGISTRATION CARD UPON DEMA... CONNECTICUT AVE/ RANDOLPH RD 39.056553 -77.073752 No No No ... M MD 8 5 4 4 2 0 3 12
15 2015-04-27 15:10:00 4th district, Wheaton DRIVER FAILURE TO STOP AT STOP SIGN LINE ENNALLS AVE @ GRANDVIEW AVE 39.041407 -77.053135 No No No ... F MD 15 8 4 1 1 1 4 27
16 2015-12-21 17:11:00 2nd district, Bethesda DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... ARLINGTON RD/ ELM 38.982033 -77.100048 No No No ... F MD 17 9 2 5 1 1 12 21
17 2014-03-18 19:07:00 1st district, Rockville DRIVING VEH. ON HWY. WITH UNPAID REGISTRATION FEE W MONTGOMERY AVE AT NELSON ST 39.085607 -77.171098 No No No ... M MD 19 10 1 5 1 0 3 18
18 2017-05-17 10:43:00 1st district, Rockville EXCEEDING POSTED MAXIMUM SPEED LIMIT: 39 MPH I... N/B TRAVILAH RD @ BRUSHWOOD TERR 39.064230 -77.270002 No No No ... F MD 10 6 1 0 2 1 5 17
19 2018-01-07 11:30:00 5th district, Germantown EXCEEDING THE POSTED SPEED LIMIT OF 40 MPH FREDERICK ROAD/WHEATFIELD DR 39.167680 -77.229683 No No No ... M MD 11 6 5 1 1 0 1 7
20 2015-09-12 00:02:00 3rd district, Silver Spring FAILURE OF LICENSEE TO NOTIFY ADMINISTRATION O... BRIGGS CHANEY DR / CASTLE BLVD 39.078653 -76.944692 No No No ... F MD 0 1 3 1 1 1 9 12
21 2016-10-27 19:47:00 4th district, Wheaton PERSON DRIVING MOTOR VEHICLE ON HIGHWAY OR PUB... MONTGOMERY VILLAGE AVE / LOST KNIFE RD 39.157830 -77.204707 No No No ... M VA 19 10 4 1 2 0 10 27
22 2018-04-16 18:37:00 4th district, Wheaton DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... NORBECK ROAD AND LLEWELLYN MANOR WAY 39.118427 -77.027362 No No No ... M MD 18 10 4 5 1 0 4 16
23 2015-11-06 08:43:00 4th district, Wheaton EXCEEDING MAXIMUM SPEED: 54 MPH IN A POSTED 45... NORBECK RD AT NORWOOD RD 39.118655 -77.022628 No No No ... M MD 8 5 4 2 2 0 11 6
24 2013-11-01 13:10:00 4th district, Wheaton EXCEEDING THE POSTED SPEED LIMIT OF 30 MPH TIDEWATER CT / RT 97 39.157913 -77.065333 No No No ... M MD 13 7 4 5 1 0 11 1
25 2015-10-22 06:20:00 2nd district, Bethesda DRIVER FAILURE TO STOP AT INTERSECTION HWY. ST... MARINELLI RD AND CITADEL AVE 38.983452 -77.092920 No No No ... M MD 6 4 2 5 2 0 10 22
26 2016-05-28 21:17:00 Headquarters and Special Operations DRIVING VEHICLE IN EXCESS OF REASONABLE AND PR... SB I270 @ OLD GEORGETOWN ROAD 39.111247 -77.231373 No No No ... M MD 21 11 7 1 2 0 5 28
27 2016-06-30 07:07:00 6th district, Gaithersburg / Montgomery Village OPERATING VEHICLE ON HIGHWAY WITH UNAUTHORIZED... TRAVIS @ WATKINS MILL 39.159982 -77.217153 No No No ... F MD 7 4 6 5 1 1 6 30
28 2013-08-31 00:00:00 2nd district, Bethesda FAILURE TO PROPERLY EQUIP & LOCATE HEADLAMPS O... ROCKVILLE PIKE / NICHOLSON LN 39.043523 -77.111100 No No No ... M MD 0 1 2 1 1 0 8 31
29 2012-05-10 20:39:00 6th district, Gaithersburg / Montgomery Village PERSON DRIVING MOTOR VEHICLE WHILE LICENSE SUS... CENTURY BLVD/MIDDLEBROOK RD 39.180432 -77.267659 No No No ... M MD 20 11 6 2 2 0 5 10
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4970 2013-06-18 03:47:00 5th district, Germantown DRIVER ENTERING INTERSECTION AT FLASHING RED T... CLOPPER/118 39.161580 -77.281001 No No No ... M MD 3 2 5 1 1 0 6 18
4971 2014-01-25 03:03:00 3rd district, Silver Spring DRIVER ENTERING INTERSECTION AT FLASHING RED T... WAYNE AVE / DALE DR 38.997325 -77.021728 No No No ... M XX 3 2 3 2 2 0 1 25
4972 2015-11-26 00:06:00 Headquarters and Special Operations FAILURE TO DISPLAY REGISTRATION CARD UPON DEMA... NB 29 @ GEORGIA AVE 38.996997 -77.027365 No No No ... F PA 0 1 7 1 1 1 11 26
4973 2016-08-01 10:42:00 1st district, Rockville DRIVING VEHICLE IN EXCESS OF REASONABLE AND PR... GAINSBOROUGH RD/HOBNAIL CT 39.035310 -77.174157 No No No ... F MD 10 6 1 5 1 1 8 1
4974 2015-01-05 00:45:00 3rd district, Silver Spring DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... UNIVERSITY BLVD E @ PINEY BRANCH RD 38.998933 -76.995555 No No No ... M XX 0 1 3 1 2 0 1 5
4975 2018-04-20 03:00:00 Headquarters and Special Operations NEGLIGENT DRIVING VEHICLE IN CARELESS AND IMPR... WOODFIELD RD / BRENISH DR 39.178014 -77.150682 No No No ... M MD 3 2 7 2 2 0 4 20
4976 2017-06-08 20:32:00 3rd district, Silver Spring FAILURE OF INDIVIDUAL DRIVING ON HIGHWAY TO DI... FLOWER AVE / PINEY BRANCH 38.999515 -77.003488 No No No ... M XX 20 11 3 2 2 0 6 8
4977 2015-06-02 09:35:00 4th district, Wheaton FAILURE TO STOP AT STOP SIGN OLNEY GIANT PARKING LOT 39.149880 -77.065773 No No No ... M MD 9 5 4 5 1 0 6 2
4978 2013-04-27 02:46:00 5th district, Germantown DRIVER FAIL TO STOP AT FLASHING RED TRAFFIC SI... SB GREAT SENECA HIGHWAY/MUDDY BRANCH RD 39.106537 -77.209083 No No No ... M MD 2 2 5 1 2 0 4 27
4979 2017-11-08 11:28:00 4th district, Wheaton DRIVING MOTOR VEHICLE ON HIGHWAY WITHOUT REQUI... UNIVERSITY BLVD W / ELKIN ST 39.042415 -77.049907 No No No ... M XX 11 6 4 2 2 0 11 8
4980 2017-01-05 19:55:00 3rd district, Silver Spring DRIVING VEH. W/O ADEQUATE REAR REG. PLATE ILLU... ELLINGTON BLVD @ FIELDS RD 39.112947 -77.198840 No No No ... F MD 19 10 3 2 1 1 1 5
4981 2013-11-24 23:11:00 2nd district, Bethesda DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... NORWOOD RD/ LAYHILL RD 39.124120 -77.029270 No No No ... M MD 23 12 2 1 1 0 11 24
4982 2015-11-21 12:53:00 5th district, Germantown DRIVER FAILURE TO YIELD RIGHT-OF-WAY TO VEH. U... MIDDLEBROOK RD/CELEBRATION WAY 39.181265 -77.269738 No No Yes ... F MD 12 7 5 5 1 1 11 21
4983 2017-01-09 18:56:00 4th district, Wheaton MODIFYING EXHAUST SYSTEM FOR VEH. CAUSING EXCE... ATHERTON DRIVE AND RANDOLPH ROAD 39.057200 -77.076870 No No No ... M MD 18 10 4 2 2 0 1 9
4984 2014-04-21 14:13:00 4th district, Wheaton EXCEEDING THE POSTED SPEED LIMIT OF 45 MPH NORBECK RD @ FIRESTONE DR 39.121535 -77.009870 No No No ... M MD 14 8 4 5 1 0 4 21
4985 2016-06-09 13:25:00 2nd district, Bethesda DRIVER WRITING,SENDING,READING A TEXT,ELECTRON... WOODMONT AVE/NORFOLK AVE 38.988455 -77.096255 No No No ... M MD 13 7 2 1 2 0 6 9
4986 2013-06-08 08:39:00 4th district, Wheaton EXCEEDING MAXIMUM SPEED: 49 MPH IN A POSTED 40... WB RANDOLPH RD PRIOR TO LOCKSLEY RD LA 1 39.070908 -77.012565 No No No ... M MD 8 5 4 5 2 0 6 8
4987 2015-04-25 11:37:00 3rd district, Silver Spring DRIVING VEH. WHILE WEARING EARPHONES UNIVERSITY BLVD EAST AT ST LAWRENCE DR 39.018603 -77.007717 No No No ... M MD 11 6 3 1 1 0 4 25
4988 2014-10-04 11:06:00 Headquarters and Special Operations PERMITTING VEH. TO BE DRIVEN ON HWY. W/O CURRE... FIRSTFIELD / QUINCE ORCHARD 39.147118 -77.233142 No No No ... F MD 11 6 7 5 1 1 10 4
4989 2016-08-16 17:23:00 3rd district, Silver Spring DRIVER USING HANDS TO USE HANDHELD TELEPHONE W... SB 29 AT TECH RD 39.059208 -76.966912 No No No ... M FL 17 9 3 5 1 0 8 16
4990 2016-05-28 07:37:00 3rd district, Silver Spring DRIVER USING HANDS TO USE HANDHELD TELEPHONE W... 29 / MUSGROVE RD 39.070028 -76.957820 No No No ... M MD 7 4 3 2 2 0 5 28
4991 2015-04-23 08:02:00 6th district, Gaithersburg / Montgomery Village DRIVER USING HANDS TO USE HANDHELD TELEPHONE W... OAKMONT AVE @ RAILROAD ST 39.132478 -77.175253 No No No ... M MD 8 5 6 2 2 0 4 23
4992 2015-03-08 01:44:00 4th district, Wheaton DRIVING VEHICLE IN EXCESS OF REASONABLE AND PR... GEORGIA AVE @ ARCOLA AVE 39.045305 -77.052255 No No No ... M MD 1 1 4 1 2 0 3 8
4993 2012-11-07 19:54:00 1st district, Rockville DRIVING VEHICLE ON HIGHWAY WITHOUT CURRENT REG... RT.118 @ I-270 39.184418 -77.254575 No No No ... F MD 19 10 1 5 1 1 11 7
4994 2017-02-25 00:16:00 6th district, Gaithersburg / Montgomery Village DRIVING VEHICLE ON HIGHWAY WITHOUT CURRENT REG... QUINCE ORCHARD RD / DARNESTOWN RD 39.117505 -77.252425 No No No ... M MD 0 1 6 4 1 0 2 25
4995 2013-12-05 10:50:00 5th district, Germantown DRIVER WRITING,SENDING,READING A TEXT,ELECTRO... S/B 270 @ EXIT 6 39.120547 -77.198065 No No No ... F MD 10 6 5 5 1 1 12 5
4996 2015-09-27 18:15:00 5th district, Germantown OPERATOR NOT RESTRAINED BY SEATBELT SHAKESPERRE RD/OBSERVATION DR 39.198568 -77.253565 No No No ... M MD 18 10 5 5 1 0 9 27
4997 2017-02-03 23:51:00 4th district, Wheaton DRIVER TURNING VEHICLE WITHOUT GIVING APPROPRI... GEORGIA AVE/ RANDOLPH RD 39.056312 -77.049788 No No No ... M MD 23 12 4 2 1 0 2 3
4998 2015-07-02 23:18:00 1st district, Rockville FAILURE OF VEH. ON HWY. TO DISPLAY LIGHTED LAM... 5405 TUCKERMAN LA 39.030070 -77.109265 No No No ... F MD 23 12 1 1 2 1 7 2
4999 2017-09-07 12:04:00 5th district, Germantown DRIVER FAIL TO MAKE REQUIRED STOP AT SIGN. MATENY RD/DAIRYMAID DR 39.151423 -77.268592 No No No ... F MD 12 7 5 2 1 1 9 7

4984 rows × 30 columns

In [22]:
#mlr = sm.ols(formula = 'lifeExp ~ year + continent_Asia + continent_Africa + continent_Europe + continent_Americas + continent_Oceania + year*continent_Asia + year*continent_Africa + year*continent_Europe + year*continent_Americas + year*continent_Oceania', data = data).fit()   
distlr = sm.ols(formula = 'district_num ~ hour + race_num + gender_num + violation_type_num', data = data_reg).fit()   
distlr.summary()


des = data_reg[["race_num", "gender_num", "violation_type_num"]].copy()
des["bias"] = 1
des = des[["bias","race_num", "gender_num", "violation_type_num"]]
dis = data_reg["district_num"]


X = np.asmatrix(des.to_numpy())
R = np.asmatrix(dis.to_numpy())
print (R)
A = np.linalg.inv(X.T * X) * X.T

W = (A * R.T).A1

W
[[4 3 3 ... 4 1 5]]
Out[22]:
array([ 3.96055502, -0.06350476, -0.10527518, -0.11471471])
In [23]:
pred = W[0] + W[1]*des["race_num"] + W[2]*des["gender_num"] + W[3]*des["violation_type_num"]

pred
Out[23]:
0       3.604116
1       3.667621
2       3.498841
3       3.667621
4       3.498841
          ...   
4995    3.423041
4996    3.528317
4997    3.718831
4998    3.562346
4999    3.613556
Length: 4984, dtype: float64
In [24]:
sp = data_reg.sample(n = 500)
In [25]:
# Setting up the plot and dimension
fig, axs = plt.subplots(nrows = 1)
fig.set_figheight(15)
fig.set_figwidth(18)

predict = distlr.predict({"hour": sp["hour"], 
                        "race_num": sp['race_num'], "gender_num": sp['gender_num'],
                        "violation_type_num": sp['violation_type_num']})

diff = sns.lineplot(x = predict, y =sp["district_num"], ax = axs)

plt.show()
predict
Out[25]:
818     3.476435
4430    3.415727
1052    3.355450
2901    3.543501
3237    3.438043
          ...   
1526    3.824553
4380    3.382867
2441    3.584576
3311    3.506534
1608    3.721990
Length: 500, dtype: float64
In [ ]: